#include "slre.h"
#include "tpl.h"

#include "term.h"

/** @brief tpl packing format for a term.
 *
 * The pack elements are: 1. term type (char); 2. data (string); 3. void* type
 * metadata, cast to 8-byte unsigned.
 */
#define TERM_PACK_FMT "csU"

#define MAX_VALID_TERM_TYPE     LSUP_TERM_BNODE ///< For type validation.
#define NCAPS 11        ///< Number of captures in URI regex.


/*
 * Data structures.
 */

struct iri_info_t {
    LSUP_NSMap *        nsm;        // NSM handle for prefixed IRI.
    struct slre_cap     prefix;     // Matching group #1.
    struct slre_cap     path;       // Matching group #5.
    struct slre_cap     frag;       // Matching group #10.
};


/*
 * Extern variables.
 */

struct hashmap *LSUP_term_cache = NULL;
uint32_t LSUP_default_dtype_key = 0;
LSUP_Term *LSUP_default_datatype = NULL;


/*
 * Static variables.
 */

// Characters not allowed in a URI string.
static const char *invalid_uri_chars = "<>\" {}|\\^`";


/*
 * Static prototypes.
 */

static LSUP_rc
term_init (
        LSUP_Term *term, LSUP_TermType type, const char *data, void *metadata);


/*
 * Term API.
 */

LSUP_Term *
LSUP_term_new (
        LSUP_TermType type, const char *data, void *metadata)
{
    LSUP_Term *term;
    CALLOC_GUARD (term, NULL);

    // If undefined, just set the type.
    if (type == LSUP_TERM_UNDEFINED) term->type = type;

    else if (UNLIKELY (term_init (
                    term, type, data, metadata) != LSUP_OK)) {
        free (term);
        return NULL;
    }

    return term;
}


LSUP_Term *
LSUP_term_copy (const LSUP_Term *src)
{
    void *metadata = NULL;

    if (LSUP_IS_IRI (src))
        metadata = (void *) LSUP_iriref_nsm (src);
    else if (src->type == LSUP_TERM_LITERAL)
        metadata = (void *) src->datatype;
    else if (src->type == LSUP_TERM_LT_LITERAL) {
        metadata = (void *) src->lang;
    }

    return LSUP_term_new (src->type, src->data, metadata);
}


LSUP_Term *
LSUP_term_new_from_buffer (const LSUP_Buffer *sterm)
{
    if (UNLIKELY (!sterm)) return NULL;

    LSUP_Term *term = NULL;
    LSUP_TermType type;
    char *data = NULL;
    void *metadata;

    tpl_node *tn;

    tn = tpl_map (TERM_PACK_FMT, &type, &data, &metadata);
    if (UNLIKELY (!tn)) goto finally;

    if (UNLIKELY (tpl_load (tn, TPL_MEM, sterm->addr, sterm->size) < 0)) {
        log_error ("Error loading serialized term.");
        goto finally;
    }
    if (UNLIKELY (tpl_unpack (tn, 0) < 0)) {
        log_error ("Error unpacking serialized term.");
        goto finally;
    }

    if (type == LSUP_TERM_LT_LITERAL)
        term = LSUP_lt_literal_new (data, (char *)&metadata);
    else term = LSUP_term_new (type, data, metadata);

finally:
    tpl_free (tn);
    free (data);

    return term;
}


LSUP_Term *
LSUP_iriref_absolute (const LSUP_Term *root, const LSUP_Term *iri)
{
    if (! LSUP_IS_IRI (iri)) {
        log_error ("Provided path is not an IRI.");
        return NULL;
    }
    if (! LSUP_IS_IRI (root)) {
        log_error ("Provided root is not an IRI.");
        return NULL;
    }

    char *data, *pfx = LSUP_iriref_prefix (iri);

    if (pfx) data = iri->data;

    else if (iri->data[0] == '/') {
        free (pfx);

        pfx = LSUP_iriref_prefix (root);
        data = malloc (strlen (iri->data) + strlen (pfx) + 1);
        if (!data) return NULL;

        sprintf (data, "%s%s", pfx, iri->data);

    } else {
        data = malloc (strlen (iri->data) + strlen (root->data) + 1);
        if (!data) return NULL;

        sprintf (data, "%s%s", root->data, iri->data);
    }
    free (pfx);

    LSUP_Term *ret = LSUP_iriref_new (data, NULL);
    if (data != iri->data) free (data);

    return ret;
}


LSUP_Term *
LSUP_iriref_relative (const LSUP_Term *root, const LSUP_Term *iri)
{
    if (! LSUP_IS_IRI (iri)) {
        log_error ("Provided path is not an IRI.");
        return NULL;
    }
    if (! LSUP_IS_IRI (root)) {
        log_error ("Provided root is not an IRI.");
        return NULL;
    }

    size_t offset = (
            strstr (iri->data, root->data) == iri->data ?
            strlen (root->data) : 0);

    return LSUP_iriref_new (iri->data + offset, LSUP_iriref_nsm (iri));
}


LSUP_Buffer *
LSUP_term_serialize (const LSUP_Term *term)
{
    /*
     * In serializing a term, the fact that two terms of different types may
     * be semantically identical must be taken into account. Specifically, a
     * namespace-prefixed IRI ref is identical to its fully qualified version,
     * and a LSUP_TERM_LT_LITERAL with no language tag is identical to a
     * LSUP_TERM_LITERAL of xsd:string type, made up of the same string. Such
     * terms must have identical serializations.
     */

    if (UNLIKELY (!term)) return NULL;

    LSUP_Term *tmp_term;
    void *metadata = NULL;

    if (term->type == LSUP_TERM_NS_IRIREF) {
        // For IRI refs, simply serialize the FQ version of the term.
        char *fq_uri;

        if (LSUP_nsmap_normalize_uri (
            term->iri_info->nsm, term->data, &fq_uri
        ) != LSUP_OK) return NULL;

        tmp_term = LSUP_iriref_new (fq_uri, NULL);
        free (fq_uri);

    } else if (term->type == LSUP_TERM_LT_LITERAL) {
        // For LT literals with empty lang tag, convert to a normal xsd:string.
        if (strlen (term->lang) == 0)
            tmp_term = LSUP_literal_new (term->data, NULL);
        else tmp_term = LSUP_lt_literal_new (term->data, (char *) term->lang);

    } else tmp_term = LSUP_term_new (
            term->type, term->data, (void *) term->datatype);
    // "datatype" can be anything here since it's cast to void *.

    // metadata field is ignored for IRI ref.
    if (tmp_term->type == LSUP_TERM_LITERAL)
        metadata = tmp_term->datatype;
    else if (tmp_term->type == LSUP_TERM_LT_LITERAL)
        memcpy (&metadata, tmp_term->lang, sizeof (metadata));

    LSUP_Buffer *sterm;
    MALLOC_GUARD (sterm, NULL);

    int rc = tpl_jot (
            TPL_MEM, &sterm->addr, &sterm->size, TERM_PACK_FMT,
            &tmp_term->type, &tmp_term->data, &metadata);
    LSUP_term_free (tmp_term);

    if (rc != 0) {
        LSUP_buffer_free (sterm);
        return NULL;
    }

    return sterm;
}


LSUP_Key
LSUP_term_hash (const LSUP_Term *term)
{
    LSUP_Buffer *buf;

    if (UNLIKELY (!term)) buf = BUF_DUMMY;
    else buf = LSUP_term_serialize (term);

    LSUP_Key key = LSUP_buffer_hash (buf);

    LSUP_buffer_free (buf);

    return key;
}


void
LSUP_term_free (LSUP_Term *term)
{
    if (LIKELY (term != NULL)) {
        if (LSUP_IS_IRI (term)) free (term->iri_info);
        free (term->data);
        free (term);
    }
}


LSUP_NSMap *
LSUP_iriref_nsm (const LSUP_Term *iri)
{
    if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) {
        log_error ("Term is not a IRI ref type.");
        return NULL;
    }

    return iri->iri_info->nsm;
}


char *
LSUP_iriref_prefix (const LSUP_Term *iri)
{
    if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) {
        log_error ("Term is not a IRI ref type.");
        return NULL;
    }

    if (iri->iri_info->prefix.ptr == NULL) return NULL;
    if (iri->iri_info->prefix.len == 0) return NULL;

    return strndup (iri->iri_info->prefix.ptr, iri->iri_info->prefix.len);
}


char *
LSUP_iriref_path (const LSUP_Term *iri)
{
    if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) {
        log_error ("Term is not a IRI ref type.");
        return NULL;
    }

    if (iri->iri_info->path.ptr == NULL) return NULL;
    if (iri->iri_info->path.len == 0) return NULL; // TODO redundant?

    return strndup (iri->iri_info->path.ptr, iri->iri_info->path.len);
}


char *
LSUP_iriref_frag (const LSUP_Term *iri)
{
    if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) {
        log_error ("Term is not a IRI ref type.");
        return NULL;
    }

    if (iri->iri_info->frag.ptr == NULL) return NULL;

    return strndup (iri->iri_info->frag.ptr, iri->iri_info->frag.len);
}


/*
 * Triple API.
 */

LSUP_Triple *
LSUP_triple_new(LSUP_Term *s, LSUP_Term *p, LSUP_Term *o)
{
    LSUP_Triple *spo = malloc (sizeof (*spo));
    if (!spo) return NULL;

    if (UNLIKELY (LSUP_triple_init (spo, s, p, o))) {
        free (spo);
        return NULL;
    }

    return spo;
}


LSUP_Triple *
LSUP_triple_new_from_btriple (const LSUP_BufferTriple *sspo)
{
    LSUP_Triple *spo = malloc (sizeof (*spo));
    if (!spo) return NULL;

    spo->s = LSUP_term_new_from_buffer (sspo->s);
    spo->p = LSUP_term_new_from_buffer (sspo->p);
    spo->o = LSUP_term_new_from_buffer (sspo->o);

    return spo;
}


LSUP_BufferTriple *
LSUP_triple_serialize (const LSUP_Triple *spo)
{
    LSUP_BufferTriple *sspo = malloc (sizeof (*sspo));
    if (!sspo) return NULL;

    sspo->s = LSUP_term_serialize (spo->s);
    sspo->p = LSUP_term_serialize (spo->p);
    sspo->o = LSUP_term_serialize (spo->o);

    return sspo;
}


LSUP_rc
LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o)
{
    /* FIXME TRP_DUMMY is a problem here.
    if (! LSUP_IS_IRI (s) && s->type != LSUP_TERM_BNODE) {
        log_error ("Subject is not of a valid term type: %d", s->type);
        return LSUP_VALUE_ERR;
    }
    if (! LSUP_IS_IRI (p)) {
        log_error ("Predicate is not of a valid term type: %d", p->type);
        return LSUP_VALUE_ERR;
    }
    */

    spo->s = s;
    spo->p = p;
    spo->o = o;

    return LSUP_OK;
}


void
LSUP_triple_done (LSUP_Triple *spo)
{
    if (UNLIKELY (!spo)) return;

    LSUP_term_free (spo->s);
    LSUP_term_free (spo->p);
    LSUP_term_free (spo->o);
}


void
LSUP_triple_free (LSUP_Triple *spo)
{
    if (UNLIKELY (!spo)) return;

    LSUP_term_free (spo->s);
    LSUP_term_free (spo->p);
    LSUP_term_free (spo->o);

    free (spo);
}


LSUP_rc
LSUP_tcache_add (const LSUP_Key key, const LSUP_Term *term)
{
    LSUP_KeyedTerm entry_s = {.key=key, .term=(LSUP_Term *)term};

    // Many calls will likely attempt inserting duplicates after the first one.
    if (LIKELY (hashmap_get (LSUP_term_cache, &entry_s))) return LSUP_NOACTION;

    hashmap_set (LSUP_term_cache, &entry_s);

    return LSUP_OK;
}


const LSUP_Term *
LSUP_tcache_get (LSUP_Key key)
{
    LSUP_KeyedTerm *entry = hashmap_get (
            LSUP_term_cache, &(LSUP_KeyedTerm){.key=key});
    if (entry) log_trace ("ID found for key %lx: %s", key, entry->term->data);
    else log_trace ("No ID found for key %lx.", key);

    return (entry) ? entry->term : NULL;
}


/*
 * Internal functions.
 */

static LSUP_rc
term_init (
        LSUP_Term *term, LSUP_TermType type,
        const char *data, void *metadata)
{
    // This can never be LSUP_TERM_UNDEFINED.
    if (type <= LSUP_TERM_UNDEFINED || type > MAX_VALID_TERM_TYPE) {
        log_error ("%d is not a valid term type.", type);
        return LSUP_VALUE_ERR;
    }

    term->type = type;

    if (data) {
        // Validate IRI.
        if (LSUP_IS_IRI (term)) {
            char *fquri;

            // Find fully qualified IRI to parse.
            if (term->type == LSUP_TERM_NS_IRIREF) {
                if (LSUP_nsmap_normalize_uri (
                    metadata, data, &fquri) != LSUP_OK
                ) {
                    log_error ("Error normalizing IRI data.");

                    return LSUP_VALUE_ERR;
                }
                log_debug ("Fully qualified IRI: %s", fquri);
            } else fquri = (char *) data;

            if (strpbrk (fquri, invalid_uri_chars) != NULL) {
                log_error (
                        "Characters %s are not allowed. Got: %s\n",
                        invalid_uri_chars, fquri);

                return LSUP_VALUE_ERR;
            }

            // Capture interesting IRI parts.
            struct slre_cap matches[NCAPS];
            int re_rc = slre_match (
                    LSUP_URI_REGEX_STR, fquri, strlen(fquri),
                    matches, NCAPS, 0);
            if (re_rc < 0) {
                log_error ("Error matching URI pattern: %d.", re_rc);

                return LSUP_VALUE_ERR;
            }
            if (term->type == LSUP_TERM_NS_IRIREF) free (fquri);

            MALLOC_GUARD (term->iri_info, LSUP_MEM_ERR);

            term->iri_info->prefix = matches[1];
            term->iri_info->path = matches[5];
            term->iri_info->frag = matches[10];
            term->iri_info->nsm = metadata;
        }

        term->data = strdup (data);

    } else {
        // No data. Make up a random UUID or URI if allowed.
        if (type == LSUP_TERM_IRIREF || type == LSUP_TERM_BNODE) {
            uuid_t uuid;
            uuid_generate_random (uuid);

            uuid_str_t uuid_str;
            uuid_unparse_lower (uuid, uuid_str);

            if (type == LSUP_TERM_IRIREF) {
                term->data = malloc (UUID4_URN_SIZE);
                snprintf (
                        term->data, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str);

                MALLOC_GUARD (term->iri_info, LSUP_MEM_ERR);

                // Allocate IRI match patterns manually.
                term->iri_info->prefix.ptr = term->data;
                term->iri_info->prefix.len = 4; // strlen("urn:")
                term->iri_info->path.ptr = term->data + 4;
                term->iri_info->path.len = UUIDSTR_SIZE + 6; // "uuid4:[...]"
                term->iri_info->frag.ptr = NULL;
                term->iri_info->frag.len = 0;
                term->iri_info->nsm = NULL;

            } else term->data = strdup (uuid_str);
        } else {
            log_error ("No data provided for term.");
            return LSUP_VALUE_ERR;
        }
    }

    if (term->type == LSUP_TERM_LT_LITERAL) {
        if (!metadata) {
            log_warn ("Lang tag is NULL. Creating a non-tagged literal.");
            term->type = LSUP_TERM_LITERAL;
        } else {
            char *lang_str = (char *) metadata;
            log_trace("Lang string: %s", lang_str);
            // Lang tags longer than 7 characters will be truncated.
            strncpy(term->lang, lang_str, sizeof (term->lang) - 1);
            if (strlen (term->lang) < 1) {
                log_error ("Lang tag cannot be an empty string.");
                return LSUP_VALUE_ERR;
            }
            term->lang[7] = '\0';
        }
    }

    if (term->type == LSUP_TERM_LITERAL) {
        term->datatype = metadata;
        if (! term->datatype) term->datatype = LSUP_default_datatype;
        log_trace ("Storing data type: %s", term->datatype->data);

        if (! LSUP_IS_IRI (term->datatype)) {
            log_error (
                    "Literal data type is not an IRI: %s",
                    term->datatype->data);

            return LSUP_VALUE_ERR;
        }

        uint32_t dtype_hash = LSUP_term_hash (term->datatype);

        const LSUP_Term *tmp = LSUP_tcache_get (dtype_hash);
        if (!tmp) LSUP_tcache_add (dtype_hash, term->datatype);
        else if (term->datatype != tmp) {
            if (term->datatype != LSUP_default_datatype)
                LSUP_term_free (term->datatype);
            term->datatype = (LSUP_Term *)tmp;
        }

        //log_trace ("Datatype address: %p", term->datatype);
        log_trace ("Datatype hash: %lx", LSUP_term_hash (term->datatype));

    } else if (term->type == LSUP_TERM_BNODE) {
        // TODO This is not usable for global skolemization.
        term->bnode_id = LSUP_HASH (
                term->data, strlen (term->data) + 1, LSUP_HASH_SEED);
    }

    return LSUP_OK;
}


/*
 * Extern inline functions.
 */

LSUP_Key LSUP_term_hash (const LSUP_Term *term);
LSUP_Term *LSUP_iriref_new (const char *data, LSUP_NSMap *nsm);
LSUP_Term *LSUP_literal_new (const char *data, LSUP_Term *datatype);
LSUP_Term *LSUP_lt_literal_new (const char *data, char *lang);
LSUP_Term *LSUP_bnode_new (const char *data);
bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2);
LSUP_Term *LSUP_triple_pos (const LSUP_Triple *trp, LSUP_TriplePos n);
LSUP_Key LSUP_triple_hash (const LSUP_Triple *trp);
